%% The script for preprocessing the EEG data

% Author: Thomas (Tien-Thong) Do
% Email: thomas.do@uts.edu.au

%%
eeglabDir = 'E:\Tool\eeglab14_1_2b\';
dataDir = 'C:\Users\EEG Data\';  % data folder
dataCleanDir = 'C:\Users\Processed\';  % data processed folder
dataCleanICA1Dir = [dataCleanDir 'AMICA\'];        
amicaDir = [dataCleanICA1Dir 'amica\'];                 

%%
subjectLabel = {'S15' 'S16' 'S17'}; % add more if you need
            
numb_models = 1;
maxx_threads = 4;
run_amica = 1;
segment_walk = 1;

%% Create the subfolder to store the preprocessing datasets
if ~exist(dataCleanDir)
    mkdir(dataCleanDir);end

if ~exist(amicaDir)
    mkdir(amicaDir);end

%% Log file to check the data deduction
fileIDtoWrite = fopen('sizeDataCheck.txt','w');

%%
% General structure - adaptied from Makoto: https://sccn.ucsd.edu/wiki/Makoto's_preprocessing_pipeline
% Step 3: Downsample the data.            
% Step 4: High-pass filter the data at 1-Hz. 
% Step 5: Import channel info
% Step 6: Remove line noise using CleanLine
% Step 7: Apply clean_rawdata() to reject bad channels and correct continuous data using Artifact Subspace Reconstruction (ASR)
% Step 8: Interpolate all the removed channels
% Step 9: Re-reference the data to average
% Step 10: Run the ICA  
% Step 11: Copy the ICA1, dipole information 
% Step 13: Estimate single equivalent current dipoles            
% Step 14: Save the final dataset
addpath(eeglabDir);

[ALLEEG EEG CURRENTSET ALLCOM] = eeglab;
dipfitdefs;

for numSub = 1:length(subjectLabel) 
    dataName = subjectLabel{numSub};
    filesNeedtoProcessed = dir(fullfile([dataDir subjectLabel{numSub}], '*.cdt'));    
        
    for fileID = 1:size(filesNeedtoProcessed, 1)+1
        if fileID == size(filesNeedtoProcessed, 1)+1            
            try                
                eeglab
                for iii = 1:size(filesNeedtoProcessed, 1)
                    
                    fileName_loc = [filesNeedtoProcessed(iii).folder '\' filesNeedtoProcessed(iii).name];
%                     EEG = pop_loadxdf(fileName_loc, 'streamtype', 'EEG', 'exclude_markerstreams', {});
%                     EEG = loadcurry(fileName_loc);
                    EEG = loadcurry(fileName_loc, 'KeepTriggerChannel', 'True', 'CurryLocations', 'True');

                    [ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, 0,'setname',filesNeedtoProcessed(iii).name,'gui','off'); 
                end
                EEG = pop_mergeset( ALLEEG, [1:size(ALLEEG,2)], 0);
                [ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, 0,'setname',[dataName '_merged'],'gui','off'); 
                fileNametoSave = dataName;                    
            catch; end;
        else
            display(['DID NOT PROCESS DATASET  ' dataName '_' num2str(fileID)]);
            continue
%             eeglab
%             fileName = [filesNeedtoProcessed(fileID).folder '\' filesNeedtoProcessed(fileID).name];
%             fileNametoSave = filesNeedtoProcessed(fileID).name(1:end-4);        
%             % Step2: Import data.
%             EEG = pop_loadxdf(fileName, 'streamtype', 'EEG', 'exclude_markerstreams', {});
%             EEG.setname = subjectLabel{numSub}; 
%             [ALLEEG EEG CURRENTSET] = pop_newset(ALLEEG, EEG, 0,'setname',fileNametoSave,'gui','off'); 
        end
        
        eeglab redraw
        display(['Processing the dataset  ' fileNametoSave]); 
        nameDataCleaned = [fileNametoSave '_Cleaned'];
        
        fprintf(fileIDtoWrite,'----- Name Data: %s   -----\n',fileNametoSave);

%         if isfile([dataCleanDir nameDataCleaned '.set']) 
%             display(['Already done for : ' nameDataCleaned '.....' ])
%             continue;    % Check if preprocessing already run for this dataset, skip if already done before
%         end
    
%         try
%         moifyingChanLocs;   %update the channellocation because LiveAmps LSL havent channels information
        EEG = pop_select( EEG,'nochannel',{'10' '11' '84' '85' '110' '111' 'HEO' 'VEO' 'EKG' 'EMG' 'TRIGGER'});
%         EEG = pop_select( EEG,'nochannel',{'T9' 'T10' 'FT9' 'FT10' 'FTT9H' 'FTT10H' 'P9' 'P10' 'PO9' 'PO10' 'I1' 'I2' 'IZ' '10' '11' '84' '85' '110' '111' 'HEO' 'VEO' 'EKG' 'EMG' 'TRIGGER'});

        % Step 3: Downsample the data.
        EEG = pop_resample(EEG, 250);

        % Step 4: High-pass filter the data at 1-Hz. Note that EEGLAB uses pass-band edge, therefore 1/2 = 0.5 Hz.
        EEG = pop_eegfiltnew(EEG, 'locutoff',0.5);            
        EEG = pop_eegfiltnew(EEG, 'hicutoff',100);

        % Step 5: Import channel info    
        EEG = pop_chanedit(EEG, 'lookup',[eeglabDir 'plugins/dipfit3.0/standard_BEM/elec/standard_1005.elc'],'eval','chans = pop_chancenter( chans, [],[]);');
        originalEEG = EEG;
        fprintf(fileIDtoWrite,'  Size Original Data: %d %d \n',size(EEG.data));            

        % Step 6: Remove line noise using CleanLine
        EEG = pop_cleanline(EEG, 'bandwidth', 2,'chanlist', [1:EEG.nbchan], 'computepower', 0, 'linefreqs', [50 100 150 200 250],...
            'normSpectrum', 0, 'p', 0.01, 'pad', 2, 'plotfigures', 0, 'scanforlines', 1, 'sigtype', 'Channels', 'tau', 100,...
            'verb', 1, 'winsize', 4, 'winstep', 4);            
        EEG = eeg_checkset(EEG);
        fprintf(fileIDtoWrite,'  Size after pop_cleanline: %d %d  \n',size(EEG.data));

        % Step 7: Clean flatlines
        EEG = clean_flatlines(EEG,3);  % 3s lost data8
        fprintf(fileIDtoWrite,'  Size data after clean_flatlines: %d %d . Remaining %f percent  \n',size(EEG.data), ...
                                                            size(EEG.data,2)/size(originalEEG.data,2)*100);

        % Step 8: Clean noise channels
        EEG = clean_channels(EEG);
        EEG_chan = EEG;    
        fprintf(fileIDtoWrite,'  Size data after clean_channels: %d %d . Remaining %f percent  \n',size(EEG.data), ...
                                                            size(EEG.data,2)/size(originalEEG.data,2)*100);

%             % Step 8.1: Check the continuous data quality
            EEG = pop_rejcont(EEG, 'elecrange', [1:size(EEG.chanlocs,2)],...
                    'freqlimit', [1 100], 'threshold', 10, 'epochlength', 0.5,...
                    'contiguous', 2, 'addlength', 0.25, 'taper', 'hamming');  % continous removal here
%             fprintf(fileIDtoWrite,'  Size data after pop_rejcont 1st time: %d %d . Remaining %f percent  \n',size(EEG.data), ...
%                                                                 size(EEG.data,2)/size(originalEEG.data,2)*100);
%             pop_saveset( EEG, 'filename', [nameDataCleaned(1:3) '_RejCont1_PreICA1'], 'filepath', dataPreICADir);

        % Step 9: Interpolate the missing channels
        EEG = pop_interp(EEG, originalEEG.chanlocs, 'spherical');

        % Step 10: Re-reference
        EEG.nbchan = EEG.nbchan+1;
        EEG.data(end+1,:) = zeros(1, EEG.pnts);
        EEG.chanlocs(1,EEG.nbchan).labels = 'initialReference';
        EEG = pop_reref(EEG, []);
        EEG = pop_select( EEG,'nochannel',{'initialReference'});

        EEG = eeg_checkset(EEG);            
        EEG_Average = EEG;   EEG_Average = eeg_checkset(EEG_Average);  
        pop_saveset( EEG, 'filename', [nameDataCleaned(1:3) '_Average_PreICA'], 'filepath', dataCleanDir);

        % BE VERY CAREFUL IN THIS STEP, THE DATA SEGMENT 
        % Step 11: Check the continuous data quality 1 more time
        EEG = pop_rejcont(EEG, 'elecrange', [1:size(EEG.chanlocs,2)],...
                'freqlimit', [1 100], 'threshold', 10, 'epochlength', 0.5,...
                'contiguous', 2, 'addlength', 0.25, 'taper', 'hamming');  % continous removal here

        fprintf(fileIDtoWrite,'  Size data after pop_rejcont 2nd time: %d %d . Remaining %f percent  \n',size(EEG.data), ...
                                                            size(EEG.data,2)/size(originalEEG.data,2)*100);

        EEG = clean_windows(EEG, 0.15, [], [], [], [], []);
        EEG = eeg_checkset(EEG);      
        fprintf(fileIDtoWrite,'  Size data after clean_windows: %d %d . Remaining %f percent  \n',size(EEG.data), ...
                                                            size(EEG.data,2)/size(originalEEG.data,2)*100);

%             pop_saveset( EEG, 'filename', [nameDataCleaned(1:3) '_ContinuousAutoRejected_PreICA1'], 'filepath', dataPreICADir);  % Save the epoch in the same folder with average dataset
%             vis_artifacts(EEG, EEG_Average)

        %% RUN THE FIRST AMICA HERE

        % Step 12: Run AMICA using calculated data rank with 'pcakeep' option
        if run_amica
            if isfield(EEG.etc, 'clean_channel_mask')
                dataRank = min([rank(double(EEG.data')) sum(EEG.etc.clean_channel_mask)]);
            else
                dataRank = rank(double(EEG.data'));
            end

            dirAmicaSubs = [amicaDir fileNametoSave];
            if ~~exist(dirAmicaSubs)
                mkdir(dirAmicaSubs);end

            runamica15(EEG.data, 'num_chans', EEG.nbchan,...
                'outdir', dirAmicaSubs,...
                'pcakeep', dataRank, 'num_models', 1,...
                'do_reject', 1, 'numrej', 15, 'rejsig', 3, 'rejint', 1);
            EEG.etc.amica  = loadmodout15(dirAmicaSubs);
            EEG.etc.amica.S = EEG.etc.amica.S(1:EEG.etc.amica.num_pcs, :); % Weirdly, I saw size(S,1) be larger than rank. This process does not hurt anyway.
            EEG.icaweights = EEG.etc.amica.W;
            EEG.icasphere  = EEG.etc.amica.S;
            EEG = eeg_checkset(EEG, 'ica');
            rankICA1 = EEG.etc.amica.num_pcs;   % for the second AMICA
            numICs = rankICA1;
        else
            EEG = pop_runica( EEG, 'runica');
%                 [w,s] = runica(EEG.data);
            disp('runica successfull, storing weights and sphere.');
%                 EEG.icaweights = w;
%                 EEG.icasphere = s;
            EEG = eeg_checkset(EEG, 'ica');
            numICs = size(EEG.icaweights,1);
        end

        % Copy the ICA to the original EEG data
        EEG_Average.icaweights = EEG.icaweights;
        EEG_Average.icasphere = EEG.icasphere;            
        EEG_Average = eeg_checkset(EEG_Average);


        % Step 14: Estimate single equivalent current dipoles
        EEG = [];
        EEG = EEG_Average;
        EEG = eeg_checkset(EEG);
        [newlocs coordinateTransformParameters] = coregister(EEG.chanlocs, template_models(2).chanfile, 'warp', 'auto', 'manual', 'off');
        templateChannelFilePath = [eeglabDir 'plugins/dipfit3.0/standard_BEM/elec/standard_1005.elc'];
        hdmFilePath             = [eeglabDir 'plugins/dipfit3.0/standard_BEM/standard_vol.mat'];
        EEG = pop_dipfit_settings( EEG, 'hdmfile', hdmFilePath, 'coordformat', 'MNI',...
            'mrifile', [eeglabDir 'plugins/dipfit3.0/standard_BEM/standard_mri.mat'],...
            'chanfile', templateChannelFilePath, 'coord_transform', coordinateTransformParameters,...
            'chansel', 1:EEG.nbchan);
        EEG = pop_multifit(EEG, 1:EEG.nbchan,'threshold', 100, 'dipplot','off','plotopt',{'normlen' 'on'});

        % Save the dataset
        EEG = pop_saveset( EEG, 'filename', nameDataCleaned, 'filepath', dataCleanDir);

        pop_topoplot(EEG,0, [1:numICs] ,[nameDataCleaned '_ICA'],[] ,1,'electrodes','off');
        print([dataCleanDir nameDataCleaned '_ICA.png'],'-dpng');
        close
            
    end
end

fclose(fileIDtoWrite);
 